package com.suyunyou.spider.plugins.page;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.suyunyou.spider.model.PageDtl;
import com.suyunyou.spider.model.SpiderException;
import com.suyunyou.spider.plugins.IPagePlugin;

/**
 * 爬取速云优的插件<br>
 * 测试页面和规则是否可以爬取的类：PageDtlPluginTest.java
 * @author yuejing
 * @date 2016年6月25日 下午4:45:30
 * @version V1.0.0
 */
public class PageDtlPlugin extends IPagePlugin {

	private static final Logger LOGGER = LoggerFactory.getLogger(PageDtlPlugin.class);
	private String titleSelect;
	private String contentSelect;
	
	public PageDtlPlugin(String regex, String titleSelect, String contentSelect) {
		this.titleSelect = titleSelect;
		this.contentSelect = contentSelect;
		super.setTargetRegex(regex);
	}

	@Override
	public PageDtl setPageDtl() throws SpiderException {
		PageDtl dtl = new PageDtl();
		Document doc = Jsoup.parse(getLink().getContent());
		Elements conts = doc.select(contentSelect);
		if(conts.size() == 0) {
			//没有找到正文
			return null;
		}
		String title = doc.select(titleSelect).text();
		dtl.setTitle(title);
		String content = "";
		for (Element element : conts) {
			content += element.html();
		}
		dtl.setContent(content);
		LOGGER.info("找到文章啦 ~ 我要写入数据库中。哈哈 [" + getLink().getLink() + "]");
		return dtl;
	}

}